* TABLE 5: HETEROGENEITY ANALYSIS: PANEL A: ACTIVE IN LABOR FORCE

cd "$mypathRR/Results/Resident/results_heterogeneity"

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * 
* GENERATE THE TREATMENT (AND CONTROL) GROUP local b " 55 60 60 60"
local b " 60 60"
local t " 80 95"
local n : word count `b'

forvalues i = 1/`n' {
local x : word `i' of `b'
local y : word `i' of `t'
cap drop treat_i
gen treat_i=.
replace treat_i=0 if ctrl_ir_`x'_`y'==1
replace treat_i=1 if taxinc_thre2==1
label var treat_i "Income tax treatment"


* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* DIFF-IN-DIFF RESIDENTS
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*ssc install diff

* ALL RESIDENTS, INCOME CONTROL GROUP 
preserve
drop if year>2010
keep if inlf01_1 == 1


collapse (count) Inmovers=persid (mean) avgtt=avgt_stek_taxable (mean) avgtg=avgt_stek_gross (mean) mtr=mtr_total , by(year treat_i)
label var Inmovers "Residents"
label var avgtt "Average tax rate on taxable income"
label var avgtg "Average tax rate on gross income"
label var mtr "Marginal tax rate on taxable income"
label var treat_i "Treatment"
label var year "Year"

sort treat_i year


list // there are observations in every, year - why N=19 in the regressions?!


gen Period1=0
replace Period1=1 if year>=2006&year<2008
gen DiD1=treat_i*Period1

gen Period2=0
replace Period2=1 if year>=2008
gen DiD2=treat_i*Period2

gen weight=Inmovers

gr tw (connect Inmovers year if treat_i == 0) ///
(connect Inmovers year if treat_i == 1) ///
, legend(order(1 "control `x'-`y'%" 2 "treatment")) ///
xline(2005.1) name(did_inLF10`x'_`y', replace)
graph export "Heterog_ParTrends-inLF_`x'_`y'.pdf", replace

* for the semi-elasticities: average change in tax for treatment group
sort treat_i year

bys treat_i: egen tax_0=mean(avgtt) if year<2006
bys treat_i: egen tax_1=mean(avgtt) if year>2005 & year<2008
bys treat_i: egen tax_2=mean(avgtt) if year>2007 
bys treat_i (year): replace tax_0=tax_0[_n-1] if tax_0==.
bys treat_i (year): replace tax_1=tax_1[_n+1] if tax_1==.
bys treat_i (year): replace tax_1=tax_1[_n+1] if tax_1==.
bys treat_i (year): replace tax_1=tax_1[_n+1] if tax_1==.
bys treat_i (year): replace tax_1=tax_1[_n+1] if tax_1==.
bys treat_i (year): replace tax_1=tax_1[_n+1] if tax_1==.
bys treat_i (year): replace tax_2=tax_2[_n+1] if tax_2==.
bys treat_i (year): replace tax_2=tax_2[_n+1] if tax_2==.
bys treat_i (year): replace tax_2=tax_2[_n+1] if tax_2==.
bys treat_i (year): replace tax_2=tax_2[_n+1] if tax_2==.
bys treat_i (year): replace tax_2=tax_2[_n+1] if tax_2==.

gen delta_tax_06=tax_1-tax_0 if treat_i==1
sum delta_tax_06
local delta_tax_06 = abs(`r(mean)')
local delta_tax_062 = `delta_tax_06'*100
global delta_tax_2006_`x'_`y'A = `delta_tax_062'

gen delta_tax_08=tax_2-tax_1 if treat_i==1
sum delta_tax_08
local delta_tax_08 = abs(`r(mean)')
local delta_tax_082 = `delta_tax_08'*100


*  Reduced Form 
display _newline(5)
display "Reduced Form, 2 treatments (06 & 08), control group: `x'-`y'%"

sum Inmovers if year <2006 & year >2000 & treat_i == 1
scalar define meanoutcome = `r(mean)'
global mean_incomers`x'_`y'A = meanoutcome

reg Inmovers DiD1 DiD2 treat_i  ib2005.year, vce(robust)
scalar define semi_e_06 = _b[DiD1]/`delta_tax_06'/100
scalar define semi_e_08 = _b[DiD2]/`delta_tax_08'/100

 outreg2  using "DiD_Residents_inctreat_`x'_`y'_uw-2tr-inLF.xls", excel label  e(F) dec(2) replace ///
  keep(DiD1 DiD2 ib2005.year treat_i ) ctitle("Taxpayers", "Reduced", "(level)") title("DiD Estimations Residents only, Control Group: `x'-`y'% of Threshold") ///
  addnote("Robust standard errors in parentheses.") ///
  addstat($ \Delta \tau_{2006} $ \% pts, `delta_tax_062', Semi-elasticity 2006, semi_e_06, $ \Delta \tau_{2008} $ \% pts, `delta_tax_082', Semi-elasticity 2008, semi_e_08)

  
  
  		local did_`x'_`y'_B = _b[DiD1]
		local se_`x'_`y'_B = _se[DiD1]
		local pval = 2*(1-normal(abs(_b[DiD1]/_se[DiD1])))
			display "`pval'"
			gen star="" 
			replace star="*" in 1 if `pval'<.1 &`pval'>=.05 //print star if p-value between
			replace star="**"  in 1 if `pval'<.05 &`pval'>=.01
			replace star="***"  in 1 if `pval'<.01
			local p = star[1]
				display "`p'"
		local did_`x'_`y'_B = trim("`: display %5.2f `did_`x'_`y'_B''")
		global DiD_`x'_`y'_BA = "`did_`x'_`y'_B'`p'"
		global SE_`x'_`y'_BA= trim("`: display %5.2f `se_`x'_`y'_B''")
			drop star
		display "${DiD_`x'_`y'_BA}"
		
		
replace Inmovers = ln(Inmovers)

sum Inmovers if year <2006 & year >2000 & treat_i == 1
scalar define lnmeanoutcome = `r(mean)'
global lnmean_incomers`x'_`y'A = lnmeanoutcome

display _newline(5)
display "Reduced Form (in logs), 2 treatments (06 & 08), control group: `x'-`y'%" 
reg Inmovers DiD1 DiD2 treat_i  ib2005.year, vce(robust)   
 outreg2  using "DiD_Residents_inctreat_`x'_`y'_uw-2tr-inLF.xls", excel label  e(F) dec(3) append ///
  addnote("Robust standard errors in parentheses.") ///
  keep(DiD1 DiD2  ib2005.year treat_i ) ctitle(" ", "Reduced", "(log)") 

 		local did_`x'_`y'_C = _b[DiD1]
		local se_`x'_`y'_C = _se[DiD1]
		local pval = 2*(1-normal(abs(_b[DiD1]/_se[DiD1])))
			display "`pval'"
			gen star="" 
			replace star="*" in 1 if `pval'<.1 &`pval'>=.05 //print star if p-value between
			replace star="**"  in 1 if `pval'<.05 &`pval'>=.01
			replace star="***"  in 1 if `pval'<.01
			local p = star[1]
				display "`p'"
		local did_`x'_`y'_C = trim("`: display %5.3f `did_`x'_`y'_C''")
		global DiD_`x'_`y'_CA = "`did_`x'_`y'_C'`p'"
		global SE_`x'_`y'_CA= trim("`: display %5.3f `se_`x'_`y'_C''")
			drop star
* 2SLS
gen lnnet_avgtt = ln(1-avgtt)
label var lnnet_avgtt "$\varepsilon_{\bar{\tau}}$"

	// HAUSMAN TEST (put this first, to include results in regression tables
	* 	Hypothesis H0: tax rates are exogenous
	*	Note:  	1) if all vars are exogenous, both 2sls and ols are consistent
	*			2) if all vars are exogenous, ols is more efficient, but it's inconsistent if violated

	foreach var in lnnet_avgtt {
		ivreg2 Inmovers treat_i   ib2005.year (`var' = DiD1) 
		estimates store iv_`var'

		reg Inmovers `var' treat_i ib2005.year
		estimates store ols_`var'

		hausman iv_`var' ols_`var'
		scalar chi_`var' = r(chi2)
		scalar p_`var' = r(p)
	} 

		foreach var in lnnet_avgtt {
		display `chi_`var''
		}

		foreach var in lnnet_avgtt {
		display `p_`var''
		}



display _newline(5)
display "IV-Regression, 2 treatments, avg. NTR, control group: `x'-`y'%" 

ivreg2 Inmovers treat_i  ib2005.year (lnnet_avgtt = DiD1 DiD2) , first savefirst redundant(DiD1 DiD2) robust  
est store ivreg_avgttr_`x'_`y'



di e(sstatp)
di e(archi2p)

//statistics to be saved in first-stage table:
display "first stage statistics"
// empty the locals
local Fstat
local Fstat_alt1
local Fstat_alt2
local widstat
local idstat
local idp
local esstat
local estatp
local arf
local arfp
local archi2
local archi2p
local sstat
local sstatp

// save stats as locals with nice display format
local idstat 	= trim("`: display %5.2f e(idstat)'")
local idp 		= trim("`: display %5.3f e(idp)'")
local esstat 	= trim("`: display %5.2f e(estat)'")
local estatp 	= trim("`: display %5.3f e(estatp)'")
local arf 		= trim("`: display %5.2f e(arf)'")
local arfp 		= trim("`: display %5.3f e(arfp)'")
local archi2 	= trim("`: display %5.2f e(archi2)'")
local archi2p	= trim("`: display %5.3f e(archi2p)'")
local sstat 	= trim("`: display %5.2f e(sstat)'")
local sstatp 	= trim("`: display %5.3f e(sstatp)'")
	
// F-Test
local widstat = trim("`: display %5.2f e(widstat)'") // F statistic for weak identification (Cragg-Donald or Kleibergen-Paap)
local Fstat_alt1 = trim("`: display %5.2f e(cdf)'") // should be the same as widstat, but not valid when errors are not iid! (e.g., robust)
local Fstat_alt2 = trim("`: display %5.2f e(F)'")	// overall F-test of 2nd stage?
global Ftest_`x'_`y'_2trA = `widstat'
global Ftest2_`x'_`y'_2trA = `Fstat_alt1'
global Ftest3_`x'_`y'_2trA = `Fstat_alt2'

// compute p-value for F-Test widstat
	local df1 = e(ardf)
	local df2 = e(ardf_r)
	local F = e(widstat)

	display "These are the parameters for the F-Test: F( `df1', `df2', `F')"
	local pvalF = Ftail(`df1', `df2', `F')
		display "This is the F-stat pvalue for `x'-`y'% group: `pvalF'"
				gen starF = ""
				replace starF="*" in 1 if `pvalF'<.1 &`pvalF'>=.05 //print star if p-value between
				replace starF="**"  in 1 if `pvalF'<.05 &`pvalF'>=.01
				replace starF="***"  in 1 if `pvalF'<.01
				local pF = starF[1]
					display "Stars for F-stat pvalue for `x'_`y': `pF'"
		global Ftest_`x'_`y'_st_2trA = "`widstat'`pF'"
		local Ftest_pval = trim("`: display %5.3f `pvalF''")
		global Ftest_pval_`x'_`y'_2trA = `Ftest_pval'
				drop starF


 outreg2  using "DiD_Residents_inctreat_`x'_`y'_uw-2tr-inLF.xls", excel label  dec(3)  append ///
 keep(lnnet_avgtt DiD1 DiD2 ib2005.year treat_i ) ctitle("Marg. tax", "2SLS", "(log)")  ///
  addnote("Robust standard errors in parentheses.") ///
  addstat(Hausman$ ^1$ , chi_lnnet_avgtt, P-value, p_lnnet_avgtt )
 
		
 outreg2  using "DiD_Resultstable_Residents_`x'_`y'_uw-inLF.xls", excel label dec(3)  append ///
 keep( lnnet_avgtt treat_i ib2005.year treat_i ) ///
  sortvar(DiD lnnet_avgtt treat_i ib2005.year) ctitle(" ", "2SLS", "(log)")  ///
  addnote("Robust standard errors in parentheses.") ///
  addstat(Hausman$ ^1$ , chi_lnnet_avgtt, P-value, p_lnnet_avgtt )
  
// save coefficients, standard errors and stars for final output table
  		local se_`x'_`y'_EA = _se[lnnet_avgtt]
		local e_`x'_`y'_EA = _b[lnnet_avgtt]
		local pval = 2*(1-normal(abs(_b[lnnet_avgtt]/_se[lnnet_avgtt])))
			display "`pval'"
			gen star="" 
			replace star="*" in 1 if `pval'<.1 &`pval'>=.05 //print star if p-value between
			replace star="**"  in 1 if `pval'<.05 &`pval'>=.01
			replace star="***"  in 1 if `pval'<.01
			local p = star[1]
				display "`p'"
		local e_`x'_`y'_EA = trim("`: display %5.3f `e_`x'_`y'_EA''")
		global E10_`x'_`y'_EA = "`e_`x'_`y'_EA'`p'"
		global SE10_`x'_`y'_EA= trim("`: display %5.3f `se_`x'_`y'_EA''")
			drop star
	

 
estimates restore _ivreg2_lnnet_avgtt
outreg2  using "DiD_Residents_inctreat_first_`x'_`y'_uw-2tr-inLF.xls",  excel label replace  ///
   addstat(	/*"Cragg-Donald F statistic", `Fstat_alt1', "F", `Fstat_alt2',*/ ///
   			"Weak identification F test$^a$", `widstat', "\hspace{0.5cm}\textit{P-value}", `Ftest_pval', ///
            "Underidentification test$^b$", `idstat', "\hspace{0.5cm}\textit{P-value}", `idp' , ///
			 "$\chi^2$ test  significance endog. regressors$^c$", `archi2' , "\hspace{0.5cm}\textit{P-value}", `archi2p' ,  ///
			 "S statistic significance endog. regressors$^d$", `sstat', "\hspace{0.5cm}\textit{P-value}", `sstatp') ///
			 /*"Anderson-Rubin F-test of significance of endogenous regressors", `arf', "\hspace{0.5cm}\textit{P-value}", `arfp',*/ ///
			 /*"Endogeneity test (C statistic or difference-in-Sargan stat)", `esstat', "\hspace{0.5cm}\textit{P-value}", `estatp'*/  ///
			 addnote("Robust standard errors in parentheses.") ///
	keep(lnnet_avgtt DiD1 DiD2 ib2005.year treat_i ) ctitle("2001-10", "Residents", "`x'-`y'\%")  

	

* OLS model, no IV 
display _newline(5)
display "OLS, no IV, avg. NTR, control group: `x'-`y'%"
reg Inmovers lnnet_avgtt treat_i  ib2005.year, vce(robust) 
 outreg2  using "DiD_Residents_inctreat_`x'_`y'_uw-2tr-inLF.xls", excel label  e(F)  dec(3)  append ///
 addnote("Robust standard errors in parentheses.") ///
 keep(lnnet_avgtt ib2005.year treat_i ) ctitle(" ", "OLS", "(log)") 
  
est restore ivreg_avgttr_`x'_`y'
 outreg2 using "DiD_Resultstable_Residents_`x'_`y'_uw-inLF.xls", excel label dec(3)  append ///
 keep(lnnet_avgtt ib2005.year treat_i ) sortvar(DiD lnnet_avgtt treat_i ib2005.year) ctitle(" ", "2SLS", "(log)")  ///
  addnote("Robust standard errors in parentheses.") ///
  addstat(Hausman$ ^1$ , chi_lnnet_avgtt, P-value, p_lnnet_avgtt )
 
restore
cap rm "DiD_Residents_inctreat_first_`x'_`y'_uw-2tr-inLF.txt"
cap rm "DiD_Resultstable_Residents_`x'_`y'_uw-inLF.txt"
cap rm "DiD_Residents_inctreat_`x'_`y'_uw-2tr-inLF.txt"
}
